/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * Copyright (C) 1997--2015 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ * * * See ./printutils.c for general remarks on Printing * and the Encode.. utils. * * See ./format.c for the format_Foo_ functions. */ #ifdef HAVE_CONFIG_H #include #endif #include "Defn.h" #include #define imax2(x, y) ((x < y) ? y : x) #include "Print.h" #include "RBufferUtils.h" static R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; /* .Internal(paste (args, sep, collapse)) .Internal(paste0(args, collapse)) * do_paste uses two passes to paste the arguments (in CAR(args)) together. * The first pass calculates the width of the paste buffer, * then it is alloc-ed and the second pass stuffs the information in. */ /* Note that NA_STRING is not handled separately here. This is deliberate -- see ?paste -- and implicitly coerces it to "NA" */ SEXP attribute_hidden do_paste(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP ans, collapse, sep, x; int sepw, u_sepw, ienc; R_xlen_t i, j, k, maxlen, nx, pwidth; const char *s, *cbuf, *csep=NULL, *u_csep=NULL; char *buf; Rboolean allKnown, anyKnown, use_UTF8, use_Bytes, sepASCII = TRUE, sepUTF8 = FALSE, sepBytes = FALSE, sepKnown = FALSE, use_sep = (PRIMVAL(op) == 0); const void *vmax; checkArity(op, args); /* We use formatting and so we must initialize printing. */ PrintDefaults(); /* Check the arguments */ x = CAR(args); if (!isVectorList(x)) error(_("invalid first argument")); nx = xlength(x); if(use_sep) { /* paste(..., sep, .) */ sep = CADR(args); if (!isString(sep) || LENGTH(sep) <= 0 || STRING_ELT(sep, 0) == NA_STRING) error(_("invalid separator")); sep = STRING_ELT(sep, 0); csep = translateChar(sep); u_sepw = sepw = (int) strlen(csep); // will be short sepASCII = strIsASCII(csep); sepKnown = ENC_KNOWN(sep) > 0; sepUTF8 = IS_UTF8(sep); sepBytes = IS_BYTES(sep); collapse = CADDR(args); } else { /* paste0(..., .) */ u_sepw = sepw = 0; sep = R_NilValue;/* -Wall */ collapse = CADR(args); } if (!isNull(collapse)) if(!isString(collapse) || LENGTH(collapse) <= 0 || STRING_ELT(collapse, 0) == NA_STRING) error(_("invalid '%s' argument"), "collapse"); if(nx == 0) return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0); /* Maximum argument length, coerce if needed */ maxlen = 0; for (j = 0; j < nx; j++) { if (!isString(VECTOR_ELT(x, j))) { /* formerly in R code: moved to C for speed */ SEXP call, xj = VECTOR_ELT(x, j); if(OBJECT(xj)) { /* method dispatch */ PROTECT(call = lang2(R_AsCharacterSymbol, xj)); SET_VECTOR_ELT(x, j, eval(call, env)); UNPROTECT(1); } else if (isSymbol(xj)) SET_VECTOR_ELT(x, j, ScalarString(PRINTNAME(xj))); else SET_VECTOR_ELT(x, j, coerceVector(xj, STRSXP)); if (!isString(VECTOR_ELT(x, j))) error(_("non-string argument to internal 'paste'")); } if(XLENGTH(VECTOR_ELT(x, j)) > maxlen) maxlen = XLENGTH(VECTOR_ELT(x, j)); } if(maxlen == 0) return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0); PROTECT(ans = allocVector(STRSXP, maxlen)); for (i = 0; i < maxlen; i++) { /* Strategy for marking the encoding: if all inputs (including * the separator) are ASCII, so is the output and we don't * need to mark. Otherwise if all non-ASCII inputs are of * declared encoding, we should mark. * Need to be careful only to include separator if it is used. */ anyKnown = FALSE; allKnown = TRUE; use_UTF8 = FALSE; use_Bytes = FALSE; if(nx > 1) { allKnown = sepKnown || sepASCII; anyKnown = sepKnown; use_UTF8 = sepUTF8; use_Bytes = sepBytes; } pwidth = 0; for (j = 0; j < nx; j++) { k = XLENGTH(VECTOR_ELT(x, j)); if (k > 0) { SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k); if(IS_UTF8(cs)) use_UTF8 = TRUE; if(IS_BYTES(cs)) use_Bytes = TRUE; } } if (use_Bytes) use_UTF8 = FALSE; vmax = vmaxget(); for (j = 0; j < nx; j++) { k = XLENGTH(VECTOR_ELT(x, j)); if (k > 0) { if(use_Bytes) pwidth += strlen(CHAR(STRING_ELT(VECTOR_ELT(x, j), i % k))); else if(use_UTF8) pwidth += strlen(translateCharUTF8(STRING_ELT(VECTOR_ELT(x, j), i % k))); else pwidth += strlen(translateChar(STRING_ELT(VECTOR_ELT(x, j), i % k))); vmaxset(vmax); } } if(use_sep) { if (use_UTF8 && !u_csep) { u_csep = translateCharUTF8(sep); u_sepw = (int) strlen(u_csep); // will be short } pwidth += (nx - 1) * (use_UTF8 ? u_sepw : sepw); } if (pwidth > INT_MAX) error(_("result would exceed 2^31-1 bytes")); cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); vmax = vmaxget(); for (j = 0; j < nx; j++) { k = XLENGTH(VECTOR_ELT(x, j)); if (k > 0) { SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k); if (use_UTF8) { s = translateCharUTF8(cs); strcpy(buf, s); buf += strlen(s); } else { s = use_Bytes ? CHAR(cs) : translateChar(cs); strcpy(buf, s); buf += strlen(s); allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(cs)> 0)); anyKnown = anyKnown || (ENC_KNOWN(cs)> 0); } } if (sepw != 0 && j != nx - 1) { if (use_UTF8) { strcpy(buf, u_csep); buf += u_sepw; } else { strcpy(buf, csep); buf += sepw; } } vmax = vmaxget(); } ienc = 0; if(use_UTF8) ienc = CE_UTF8; else if(use_Bytes) ienc = CE_BYTES; else if(anyKnown && allKnown) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } SET_STRING_ELT(ans, i, mkCharCE(cbuf, ienc)); } /* Now collapse, if required. */ if(collapse != R_NilValue && (nx = XLENGTH(ans)) > 0) { sep = STRING_ELT(collapse, 0); use_UTF8 = IS_UTF8(sep); use_Bytes = IS_BYTES(sep); for (i = 0; i < nx; i++) { if(IS_UTF8(STRING_ELT(ans, i))) use_UTF8 = TRUE; if(IS_BYTES(STRING_ELT(ans, i))) use_Bytes = TRUE; } if(use_Bytes) { csep = CHAR(sep); use_UTF8 = FALSE; } else if(use_UTF8) csep = translateCharUTF8(sep); else csep = translateChar(sep); sepw = (int) strlen(csep); anyKnown = ENC_KNOWN(sep) > 0; allKnown = anyKnown || strIsASCII(csep); pwidth = 0; vmax = vmaxget(); for (i = 0; i < nx; i++) if(use_UTF8) { pwidth += strlen(translateCharUTF8(STRING_ELT(ans, i))); vmaxset(vmax); } else /* already translated */ pwidth += strlen(CHAR(STRING_ELT(ans, i))); pwidth += (nx - 1) * sepw; if (pwidth > INT_MAX) error(_("result would exceed 2^31-1 bytes")); cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); vmax = vmaxget(); for (i = 0; i < nx; i++) { if(i > 0) { strcpy(buf, csep); buf += sepw; } if(use_UTF8) s = translateCharUTF8(STRING_ELT(ans, i)); else /* already translated */ s = CHAR(STRING_ELT(ans, i)); strcpy(buf, s); while (*buf) buf++; allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(STRING_ELT(ans, i)) > 0)); anyKnown = anyKnown || (ENC_KNOWN(STRING_ELT(ans, i)) > 0); if(use_UTF8) vmaxset(vmax); } UNPROTECT(1); ienc = CE_NATIVE; if(use_UTF8) ienc = CE_UTF8; else if(use_Bytes) ienc = CE_BYTES; else if(anyKnown && allKnown) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } PROTECT(ans = allocVector(STRSXP, 1)); SET_STRING_ELT(ans, 0, mkCharCE(cbuf, ienc)); } R_FreeStringBufferL(&cbuff); UNPROTECT(1); return ans; } SEXP attribute_hidden do_filepath(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP ans, sep, x; int i, j, k, ln, maxlen, nx, nzero, pwidth, sepw; const char *s, *csep, *cbuf; char *buf; checkArity(op, args); /* Check the arguments */ x = CAR(args); if (!isVectorList(x)) error(_("invalid first argument")); nx = length(x); if(nx == 0) return allocVector(STRSXP, 0); sep = CADR(args); if (!isString(sep) || LENGTH(sep) <= 0 || STRING_ELT(sep, 0) == NA_STRING) error(_("invalid separator")); sep = STRING_ELT(sep, 0); csep = CHAR(sep); sepw = (int) strlen(csep); /* hopefully 1 */ /* Any zero-length argument gives zero-length result */ maxlen = 0; nzero = 0; for (j = 0; j < nx; j++) { if (!isString(VECTOR_ELT(x, j))) { /* formerly in R code: moved to C for speed */ SEXP call, xj = VECTOR_ELT(x, j); if(OBJECT(xj)) { /* method dispatch */ PROTECT(call = lang2(R_AsCharacterSymbol, xj)); SET_VECTOR_ELT(x, j, eval(call, env)); UNPROTECT(1); } else if (isSymbol(xj)) SET_VECTOR_ELT(x, j, ScalarString(PRINTNAME(xj))); else SET_VECTOR_ELT(x, j, coerceVector(xj, STRSXP)); if (!isString(VECTOR_ELT(x, j))) error(_("non-string argument to Internal paste")); } ln = LENGTH(VECTOR_ELT(x, j)); if(ln > maxlen) maxlen = ln; if(ln == 0) {nzero++; break;} } if(nzero || maxlen == 0) return allocVector(STRSXP, 0); PROTECT(ans = allocVector(STRSXP, maxlen)); for (i = 0; i < maxlen; i++) { pwidth = 0; for (j = 0; j < nx; j++) { k = LENGTH(VECTOR_ELT(x, j)); pwidth += (int) strlen(translateChar(STRING_ELT(VECTOR_ELT(x, j), i % k))); } pwidth += (nx - 1) * sepw; cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); for (j = 0; j < nx; j++) { k = LENGTH(VECTOR_ELT(x, j)); if (k > 0) { s = translateChar(STRING_ELT(VECTOR_ELT(x, j), i % k)); strcpy(buf, s); buf += strlen(s); } if (j != nx - 1 && sepw != 0) { strcpy(buf, csep); buf += sepw; } } #ifdef Win32 // Trailing seps are invalid for file paths except for / and d:/ if(streql(csep, "/") || streql(csep, "\\")) { if(buf > cbuf) { buf--; if(*buf == csep[0] && buf > cbuf && (buf != cbuf+2 || cbuf[1] != ':')) *buf = '\0'; } } #endif SET_STRING_ELT(ans, i, mkChar(cbuf)); } R_FreeStringBufferL(&cbuff); UNPROTECT(1); return ans; } /* format.default(x, trim, digits, nsmall, width, justify, na.encode, scientific, decimal.mark) */ SEXP attribute_hidden do_format(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP l, x, y, swd; int il, digits, trim = 0, nsmall = 0, wd = 0, adj = -1, na, sci = 0; int w, d, e; int wi, di, ei, scikeep; const char *strp; R_xlen_t i, n; checkArity(op, args); PrintDefaults(); scikeep = R_print.scipen; if (isEnvironment(x = CAR(args))) { return mkString(EncodeEnvironment(x)); } else if (!isVector(x)) error(_("first argument must be atomic")); args = CDR(args); trim = asLogical(CAR(args)); if (trim == NA_INTEGER) error(_("invalid '%s' argument"), "trim"); args = CDR(args); if (!isNull(CAR(args))) { digits = asInteger(CAR(args)); if (digits == NA_INTEGER || digits < R_MIN_DIGITS_OPT || digits > R_MAX_DIGITS_OPT) error(_("invalid '%s' argument"), "digits"); R_print.digits = digits; } args = CDR(args); nsmall = asInteger(CAR(args)); if (nsmall == NA_INTEGER || nsmall < 0 || nsmall > 20) error(_("invalid '%s' argument"), "nsmall"); args = CDR(args); if (isNull(swd = CAR(args))) wd = 0; else wd = asInteger(swd); if(wd == NA_INTEGER) error(_("invalid '%s' argument"), "width"); args = CDR(args); adj = asInteger(CAR(args)); if(adj == NA_INTEGER || adj < 0 || adj > 3) error(_("invalid '%s' argument"), "justify"); args = CDR(args); na = asLogical(CAR(args)); if(na == NA_LOGICAL) error(_("invalid '%s' argument"), "na.encode"); args = CDR(args); if(LENGTH(CAR(args)) != 1) error(_("invalid '%s' argument"), "scientific"); if(isLogical(CAR(args))) { int tmp = LOGICAL(CAR(args))[0]; if(tmp == NA_LOGICAL) sci = NA_INTEGER; else sci = tmp > 0 ?-100 : 100; } else if (isNumeric(CAR(args))) { sci = asInteger(CAR(args)); } else error(_("invalid '%s' argument"), "scientific"); if(sci != NA_INTEGER) R_print.scipen = sci; args = CDR(args); // copy/paste from "OutDec" part of ./options.c if (TYPEOF(CAR(args)) != STRSXP || LENGTH(CAR(args)) != 1) error(_("invalid '%s' argument"), "decimal.mark"); char *my_OutDec; if(STRING_ELT(CAR(args), 0) == NA_STRING) my_OutDec = OutDec; // default else { static char sdec[11]; // not warning here by default for now #ifdef _WARN_decimal_mark_non_1 if(R_nchar(STRING_ELT(CAR(args), 0), Chars, /* allowNA = */ FALSE, /* keepNA = */ FALSE, "decimal.mark") != 1) // will become an error warning(_("'decimal.mark' must be a string of one character")); #endif strncpy(sdec, CHAR(STRING_ELT(CAR(args), 0)), 10); sdec[10] = '\0'; my_OutDec = sdec; } if ((n = XLENGTH(x)) <= 0) { PROTECT(y = allocVector(STRSXP, 0)); } else { switch (TYPEOF(x)) { case LGLSXP: PROTECT(y = allocVector(STRSXP, n)); if (trim) w = 0; else formatLogical(LOGICAL(x), n, &w); w = imax2(w, wd); for (i = 0; i < n; i++) { strp = EncodeLogical(LOGICAL(x)[i], w); SET_STRING_ELT(y, i, mkChar(strp)); } break; case INTSXP: PROTECT(y = allocVector(STRSXP, n)); if (trim) w = 0; else formatInteger(INTEGER(x), n, &w); w = imax2(w, wd); for (i = 0; i < n; i++) { strp = EncodeInteger(INTEGER(x)[i], w); SET_STRING_ELT(y, i, mkChar(strp)); } break; case REALSXP: formatReal(REAL(x), n, &w, &d, &e, nsmall); if (trim) w = 0; w = imax2(w, wd); PROTECT(y = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { strp = EncodeReal0(REAL(x)[i], w, d, e, my_OutDec); SET_STRING_ELT(y, i, mkChar(strp)); } break; case CPLXSXP: formatComplex(COMPLEX(x), n, &w, &d, &e, &wi, &di, &ei, nsmall); if (trim) wi = w = 0; w = imax2(w, wd); wi = imax2(wi, wd); PROTECT(y = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { strp = EncodeComplex(COMPLEX(x)[i], w, d, e, wi, di, ei, my_OutDec); SET_STRING_ELT(y, i, mkChar(strp)); } break; case STRSXP: { /* this has to be different from formatString/EncodeString as we don't actually want to encode here */ const char *s; char *q; int b, b0, cnt = 0, j; SEXP s0, xx; /* This is clumsy, but it saves rewriting and re-testing this complex code */ PROTECT(xx = duplicate(x)); for (i = 0; i < n; i++) { SEXP tmp = STRING_ELT(xx, i); if(IS_BYTES(tmp)) { const char *p = CHAR(tmp), *q; char *pp = R_alloc(4*strlen(p)+1, 1), *qq = pp, buf[5]; for (q = p; *q; q++) { unsigned char k = (unsigned char) *q; if (k >= 0x20 && k < 0x80) { *qq++ = *q; } else { snprintf(buf, 5, "\\x%02x", k); for(int j = 0; j < 4; j++) *qq++ = buf[j]; } } *qq = '\0'; s = pp; } else s = translateChar(tmp); if(s != CHAR(tmp)) SET_STRING_ELT(xx, i, mkChar(s)); } w = wd; if (adj != Rprt_adj_none) { for (i = 0; i < n; i++) if (STRING_ELT(xx, i) != NA_STRING) w = imax2(w, Rstrlen(STRING_ELT(xx, i), 0)); else if (na) w = imax2(w, R_print.na_width); } else w = 0; /* now calculate the buffer size needed, in bytes */ for (i = 0; i < n; i++) if (STRING_ELT(xx, i) != NA_STRING) { il = Rstrlen(STRING_ELT(xx, i), 0); cnt = imax2(cnt, LENGTH(STRING_ELT(xx, i)) + imax2(0, w-il)); } else if (na) cnt = imax2(cnt, R_print.na_width + imax2(0, w-R_print.na_width)); R_CheckStack2(cnt+1); char buff[cnt+1]; PROTECT(y = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { if(!na && STRING_ELT(xx, i) == NA_STRING) { SET_STRING_ELT(y, i, NA_STRING); } else { q = buff; if(STRING_ELT(xx, i) == NA_STRING) s0 = R_print.na_string; else s0 = STRING_ELT(xx, i) ; s = CHAR(s0); il = Rstrlen(s0, 0); b = w - il; if(b > 0 && adj != Rprt_adj_left) { b0 = (adj == Rprt_adj_centre) ? b/2 : b; for(j = 0 ; j < b0 ; j++) *q++ = ' '; b -= b0; } for(j = 0; j < LENGTH(s0); j++) *q++ = *s++; if(b > 0 && adj != Rprt_adj_right) for(j = 0 ; j < b ; j++) *q++ = ' '; *q = '\0'; SET_STRING_ELT(y, i, mkChar(buff)); } } } UNPROTECT(2); /* xx , y */ PROTECT(y); break; default: error(_("Impossible mode ( x )")); y = R_NilValue;/* -Wall */ } } if((l = getAttrib(x, R_DimSymbol)) != R_NilValue) { setAttrib(y, R_DimSymbol, l); if((l = getAttrib(x, R_DimNamesSymbol)) != R_NilValue) setAttrib(y, R_DimNamesSymbol, l); } else if((l = getAttrib(x, R_NamesSymbol)) != R_NilValue) setAttrib(y, R_NamesSymbol, l); /* In case something else forgets to set PrintDefaults(), PR#14477 */ R_print.scipen = scikeep; UNPROTECT(1); /* y */ return y; } /* format.info(obj) --> 3 integers (w,d,e) with the formatting information * w = total width (#{chars}) per item * d = #{digits} to RIGHT of "." * e = {0:2}. 0: Fixpoint; * 1,2: exponential with 2/3 digit expon. * * for complex : 2 x 3 integers for (Re, Im) */ SEXP attribute_hidden do_formatinfo(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP x; int digits, nsmall, no = 1, w, d, e, wi, di, ei; checkArity(op, args); x = CAR(args); R_xlen_t n = XLENGTH(x); PrintDefaults(); if (!isNull(CADR(args))) { digits = asInteger(CADR(args)); if (digits == NA_INTEGER || digits < R_MIN_DIGITS_OPT || digits > R_MAX_DIGITS_OPT) error(_("invalid '%s' argument"), "digits"); R_print.digits = digits; } nsmall = asInteger(CADDR(args)); if (nsmall == NA_INTEGER || nsmall < 0 || nsmall > 20) error(_("invalid '%s' argument"), "nsmall"); w = 0; d = 0; e = 0; switch (TYPEOF(x)) { case RAWSXP: formatRaw(RAW(x), n, &w); break; case LGLSXP: formatLogical(LOGICAL(x), n, &w); break; case INTSXP: formatInteger(INTEGER(x), n, &w); break; case REALSXP: no = 3; formatReal(REAL(x), n, &w, &d, &e, nsmall); break; case CPLXSXP: no = 6; wi = di = ei = 0; formatComplex(COMPLEX(x), n, &w, &d, &e, &wi, &di, &ei, nsmall); break; case STRSXP: for (R_xlen_t i = 0; i < n; i++) if (STRING_ELT(x, i) != NA_STRING) { int il = Rstrlen(STRING_ELT(x, i), 0); if (il > w) w = il; } break; default: error(_("atomic vector arguments only")); } x = allocVector(INTSXP, no); INTEGER(x)[0] = w; if(no > 1) { INTEGER(x)[1] = d; INTEGER(x)[2] = e; } if(no > 3) { INTEGER(x)[3] = wi; INTEGER(x)[4] = di; INTEGER(x)[5] = ei; } return x; }