/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1997--2018 The R Core Team * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ * * EXPORTS: * * OneIndex() -- used for "[[<-" in ./subassign.c * get1index() -- used for "[[" in ./subassign.c & subset.c * vectorIndex() -- used for "[[" and "[[<-" with a vector arg * mat2indsub() -- for "mat[i]" " " " * makeSubscript() -- for "[" and "[<-" in ./subset.c and ./subassign.c, * and "[[<-" with a scalar in ./subassign.c * arraySubscript() -- for "[i,j,..." and "[<-..." in ./subset.c, ./subassign.c */ #ifdef HAVE_CONFIG_H #include #endif #include #include /* interval at which to check interrupts, a guess (~subsecond on current hw) */ #define NINTERRUPT 10000000 /* We might get a call with R_NilValue from subassignment code */ #define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy); #define ECALL3(call, yy, A) if(call == R_NilValue) error(yy, A); else errorcall(call, yy, A); /* This allows for the unusual case where x is of length 2, and x[[-m]] selects one element for m = 1, 2. So 'len' is only used if it is 2 and i is negative. */ static R_INLINE int integerOneIndex(int i, R_xlen_t len, SEXP call) { int indx = -1; if (i > 0) /* a regular 1-based index from R */ indx = i - 1; else if (i == 0 || len < 2) { ECALL3(call, _("attempt to select less than one element in %s"), "integerOneIndex"); } else if (len == 2 && i > -3) indx = 2 + i; else { ECALL3(call, _("attempt to select more than one element in %s"), "integerOneIndex"); } return indx; } /* Utility used (only in) do_subassign2_dflt(), i.e. "[[<-" in ./subassign.c : */ R_xlen_t attribute_hidden OneIndex(SEXP x, SEXP s, R_xlen_t nx, int partial, SEXP *newname, int pos, SEXP call) { SEXP names; R_xlen_t i, indx; const void *vmax; if (pos < 0 && length(s) > 1) { ECALL3(call, _("attempt to select more than one element in %s"), "OneIndex"); } if (pos < 0 && length(s) < 1) { ECALL3(call, _("attempt to select less than one element in %s"), "OneIndex"); } if(pos < 0) pos = 0; indx = -1; *newname = R_NilValue; switch(TYPEOF(s)) { case LGLSXP: case INTSXP: indx = integerOneIndex(INTEGER_ELT(s, pos), nx, call); break; case REALSXP: indx = integerOneIndex((int)REAL_ELT(s, pos), nx, call); break; case STRSXP: vmax = vmaxget(); names = getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { PROTECT(names); /* Try for exact match */ for (i = 0; i < nx; i++) { const char *tmp = translateChar(STRING_ELT(names, i)); if (!tmp[0]) continue; if (streql(tmp, translateChar(STRING_ELT(s, pos)))) { indx = i; break; } } // Try for partial match -- not ever used in current R (partial is 0) if (partial && indx < 0) { size_t l = strlen(translateChar(STRING_ELT(s, pos))); for(i = 0; i < nx; i++) { const char *tmp = translateChar(STRING_ELT(names, i)); if (!tmp[0]) continue; if(!strncmp(tmp, translateChar(STRING_ELT(s, pos)), l)) { if(indx == -1 ) indx = i; else indx = -2; } } } UNPROTECT(1); /* names */ } if (indx == -1) indx = nx; *newname = STRING_ELT(s, pos); vmaxset(vmax); break; case SYMSXP: vmax = vmaxget(); names = getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { PROTECT(names); for (i = 0; i < nx; i++) if (streql(translateChar(STRING_ELT(names, i)), translateChar(PRINTNAME(s)))) { indx = i; break; } UNPROTECT(1); /* names */ } if (indx == -1) indx = nx; *newname = PRINTNAME(s); vmaxset(vmax); break; default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return indx; } /* used here and in subset.c and subassign.c */ R_xlen_t attribute_hidden get1index(SEXP s, SEXP names, R_xlen_t len, int pok, int pos, SEXP call) { /* Get a single index for the [[ and [[<- operators. Checks that only one index is being selected. Returns -1 for no match. s is the subscript len is the length of the object or dimension, with names its (dim)names. pos is len-1 or -1 for [[, -1 for [[<- -1 means use the only element of length-1 s. pok : is "partial ok" ? if pok is -1, warn if partial matching occurs, but allow. */ int warn_pok = 0; const char *ss, *cur_name; R_xlen_t indx; const void *vmax; if (pok == -1) { pok = 1; warn_pok = 1; } if (pos < 0 && length(s) != 1) { if (length(s) > 1) { ECALL3(call, _("attempt to select more than one element in %s"), "get1index"); } else { ECALL3(call, _("attempt to select less than one element in %s"), "get1index"); } } else if(pos >= length(s)) { ECALL(call, _("internal error in use of recursive indexing")); } if(pos < 0) pos = 0; indx = -1; switch (TYPEOF(s)) { case LGLSXP: case INTSXP: { int i = INTEGER_ELT(s, pos); if (i != NA_INTEGER) indx = integerOneIndex(i, len, call); break; } case REALSXP: { double dblind = REAL_ELT(s, pos); if(!ISNAN(dblind)) { /* see comment above integerOneIndex */ if (dblind > 0) indx = (R_xlen_t)(dblind - 1); else if (dblind == 0 || len < 2) { ECALL3(call, _("attempt to select less than one element in %s"), "get1index "); } else if (len == 2 && dblind > -3) indx = (R_xlen_t)(2 + dblind); else { ECALL3(call, _("attempt to select more than one element in %s"), "get1index "); } } break; } case STRSXP: /* NA matches nothing */ if(STRING_ELT(s, pos) == NA_STRING) break; /* "" matches nothing: see names.Rd */ if(!CHAR(STRING_ELT(s, pos))[0]) break; /* Try for exact match */ vmax = vmaxget(); ss = translateChar(STRING_ELT(s, pos)); for (R_xlen_t i = 0; i < xlength(names); i++) if (STRING_ELT(names, i) != NA_STRING) { if (streql(translateChar(STRING_ELT(names, i)), ss)) { indx = i; break; } } /* Try for partial match */ if (pok && indx < 0) { size_t len = strlen(ss); for(R_xlen_t i = 0; i < xlength(names); i++) { if (STRING_ELT(names, i) != NA_STRING) { cur_name = translateChar(STRING_ELT(names, i)); if(!strncmp(cur_name, ss, len)) { if(indx == -1) {/* first one */ indx = i; if (warn_pok) { if (call == R_NilValue) warning(_("partial match of '%s' to '%s'"), ss, cur_name); else warningcall(call, _("partial match of '%s' to '%s'"), ss, cur_name); } } else { indx = -2;/* more than one partial match */ if (warn_pok) /* already given context */ warningcall(R_NilValue, _("further partial match of '%s' to '%s'"), ss, cur_name); break; } } } } } vmaxset(vmax); break; case SYMSXP: vmax = vmaxget(); for (R_xlen_t i = 0; i < xlength(names); i++) if (STRING_ELT(names, i) != NA_STRING && streql(translateChar(STRING_ELT(names, i)), CHAR(PRINTNAME(s)))) { indx = i; vmaxset(vmax); break; } break; default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return indx; } /* This is used for [[ and [[<- with a vector of indices of length > 1 . x is a list or pairlist, and it is indexed recusively from level start to level stop-1. ( 0...len-1 or 0..len-2 then len-1). For [[<- it needs to duplicate if substructure might be shared. */ SEXP attribute_hidden vectorIndex(SEXP x, SEXP thesub, int start, int stop, int pok, SEXP call, Rboolean dup) { int i; R_xlen_t offset; SEXP cx; /* sanity check */ if (dup && MAYBE_SHARED(x)) error("should only be called in an assignment context."); for(i = start; i < stop; i++) { if(!isVectorList(x) && !isPairList(x)) { if (i) errorcall(call, _("recursive indexing failed at level %d\n"), i+1); else errorcall(call, _("attempt to select more than one element in %s"), "vectorIndex"); } PROTECT(x); SEXP names = PROTECT(getAttrib(x, R_NamesSymbol)); offset = get1index(thesub, names, xlength(x), pok, i, call); UNPROTECT(2); /* x, names */ if(offset < 0 || offset >= xlength(x)) errorcall(call, _("no such index at level %d\n"), i+1); if(isPairList(x)) { #ifdef LONG_VECTOR_SUPPORT if (offset > R_SHORT_LEN_MAX) error("invalid subscript for pairlist"); #endif cx = nthcdr(x, (int) offset); RAISE_NAMED(CAR(x), NAMED(x)); x = CAR(cx); if (dup && MAYBE_SHARED(x)) { PROTECT(cx); x = shallow_duplicate(x); SETCAR(cx, x); UNPROTECT(1); /* cx */ } } else { cx = x; x = VECTOR_ELT(x, offset); RAISE_NAMED(x, NAMED(cx)); if (dup && MAYBE_SHARED(x)) { PROTECT(cx); x = shallow_duplicate(x); SET_VECTOR_ELT(cx, offset, x); UNPROTECT(1); /* cx */ } } } return x; } /* Special Matrix Subscripting: Handles the case x[i] where x is an n-way array and i is a matrix with n columns. This code returns a vector containing the subscripts to be extracted when x is regarded as unravelled. Negative indices are not allowed. A zero/NA anywhere in a row will cause a zero/NA in the same position in the result. */ SEXP attribute_hidden mat2indsub(SEXP dims, SEXP s, SEXP call) { int nrs = nrows(s); R_xlen_t NR = nrs; SEXP rvec; int ndim = LENGTH(dims); const int *pdims = INTEGER_RO(dims); if (ncols(s) != ndim) { ECALL(call, _("incorrect number of columns in matrix subscript")); } #ifdef LONG_VECTOR_SUPPORT /* Check if it is a long vector we need to index */ R_xlen_t len = 1; for (int j = 0; j < ndim; j++) len *= pdims[j]; if(len > R_SHORT_LEN_MAX) { PROTECT(rvec = allocVector(REALSXP, nrs)); double *rv = REAL(rvec); for (int i = 0; i < nrs; i++) rv[i] = 1.; // 1-based. if (TYPEOF(s) == REALSXP) { for (int i = 0; i < nrs; i++) { R_xlen_t tdim = 1; const double *ps = REAL_RO(s); for (int j = 0; j < ndim; j++) { double k = ps[i + j * NR]; if(ISNAN(k)) {rv[i] = NA_REAL; break;} if(k < 0) { ECALL(call, _("negative values are not allowed in a matrix subscript")); } if(k == 0.) {rv[i] = 0.; break;} if (k > pdims[j]) { ECALL(call, _("subscript out of bounds")); } rv[i] += (k - 1.) * tdim; tdim *= pdims[j]; } } } else { s = coerceVector(s, INTSXP); const int *ps = INTEGER_RO(s); for (int i = 0; i < nrs; i++) { R_xlen_t tdim = 1; for (int j = 0; j < ndim; j++) { int k = ps[i + j * NR]; if(k == NA_INTEGER) {rv[i] = NA_REAL; break;} if(k < 0) { ECALL(call, _("negative values are not allowed in a matrix subscript")); } if(k == 0) {rv[i] = 0.; break;} if (k > pdims[j]) { ECALL(call, _("subscript out of bounds")); } rv[i] += (double) ((k - 1) * tdim); tdim *= pdims[j]; } } } } else #endif { PROTECT(rvec = allocVector(INTSXP, nrs)); int *iv = INTEGER(rvec); for (int i = 0; i < nrs; i++) iv[i] = 1; // 1-based. s = coerceVector(s, INTSXP); int *ps = INTEGER(s); for (int i = 0; i < nrs; i++) { int tdim = 1; for (int j = 0; j < ndim; j++) { int k = ps[i + j * NR]; if(k == NA_INTEGER) {iv[i] = NA_INTEGER; break;} if(k < 0) { ECALL(call, _("negative values are not allowed in a matrix subscript")); } if(k == 0) {iv[i] = 0; break;} if (k > pdims[j]) { ECALL(call, _("subscript out of bounds")); } iv[i] += (k - 1) * tdim; tdim *= pdims[j]; } } } UNPROTECT(1); return rvec; } /* Special Matrix Subscripting: For the case x[i] where x is an n-way array and i is a character matrix with n columns, this code converts i to an integer matrix by matching against the dimnames of x. NA values in any row of i propagate to the result. Unmatched entries result in a subscript out of bounds error. */ SEXP attribute_hidden strmat2intmat(SEXP s, SEXP dnamelist, SEXP call) { /* XXX: assumes all args are protected */ int nr = nrows(s), i, j, v; R_xlen_t idx, NR = nr; SEXP dnames, snames, si, sicol, s_elt; PROTECT(snames = allocVector(STRSXP, nr)); PROTECT(si = allocVector(INTSXP, xlength(s))); dimgets(si, getAttrib(s, R_DimSymbol)); int *psi = INTEGER(si); for (i = 0; i < length(dnamelist); i++) { dnames = VECTOR_ELT(dnamelist, i); for (j = 0; j < nr; j++) SET_STRING_ELT(snames, j, STRING_ELT(s, j + (i * NR))); PROTECT(sicol = match(dnames, snames, 0)); for (j = 0; j < nr; j++) { v = INTEGER_ELT(sicol, j); idx = j + (i * NR); s_elt = STRING_ELT(s, idx); if (s_elt == NA_STRING) v = NA_INTEGER; if (!CHAR(s_elt)[0]) v = 0; /* disallow "" match */ if (v == 0) errorcall(call, _("subscript out of bounds")); psi[idx] = v; } UNPROTECT(1); } UNPROTECT(2); return si; } static SEXP nullSubscript(R_xlen_t n) { SEXP indx; #ifdef LONG_VECTOR_SUPPORT if (n > R_SHORT_LEN_MAX) { indx = allocVector(REALSXP, n); double *pindx = REAL(indx); for (R_xlen_t i = 0; i < n; i++) pindx[i] = (double)(i + 1); } else #endif { indx = allocVector(INTSXP, n); int *pindx = INTEGER(indx); for (int i = 0; i < n; i++) pindx[i] = i + 1; } return indx; } static SEXP logicalSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call) { R_xlen_t count, i, nmax, i1, i2; int canstretch; SEXP indx; canstretch = *stretch > 0; if (!canstretch && ns > nx) { ECALL(call, _("(subscript) logical subscript too long")); } nmax = (ns > nx) ? ns : nx; *stretch = (ns > nx) ? ns : 0; if (ns == 0) return(allocVector(INTSXP, 0)); const int *ps = LOGICAL_RO(s); /* Calling LOCICAL_RO here may force a large allocation, but no larger than the one made by R_alloc below. This could use rewriting to better handle a sparse logical index. */ #ifdef LONG_VECTOR_SUPPORT if (nmax > R_SHORT_LEN_MAX) { if (ns == nmax) { /* no recycling - use fast single-index code */ const void *vmax = vmaxget(); double *buf = (double *) R_alloc(nmax, sizeof(double)); count = 0; R_ITERATE_CHECK(NINTERRUPT, nmax, i, if (ps[i]) { if (ps[i] == NA_LOGICAL) buf[count++] = NA_REAL; else buf[count++] = (double)(i + 1); }); PROTECT(indx = allocVector(REALSXP, count)); memcpy(REAL(indx), buf, sizeof(double) * count); vmaxset(vmax); UNPROTECT(1); return indx; } count = 0; /* we only need to scan s once even if we recycle, just remember the total count as well as the count for the last incomplete chunk (if any) */ i1 = (ns < nmax) ? (nmax % ns) : 0; if (i1 > 0) { /* last recycling chunk is incomplete - we have to get the truncated count as well */ R_xlen_t rem = 0; for (i = 0; i < ns; i++) { if (i == i1) rem = count; if (ps[i]) count++; } count = count * (nmax / ns) + rem; } else { /* nested recycling, total is sufficient */ for (i = 0; i < ns; i++) if (ps[i]) count++; count *= nmax / ns; } PROTECT(indx = allocVector(REALSXP, count)); double *pindx = REAL(indx); count = 0; MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2, if (ps[i1]) { if (ps[i1] == NA_LOGICAL) pindx[count++] = NA_REAL; else pindx[count++] = (double)(i + 1); }); UNPROTECT(1); return indx; } #endif // else --- the same code for non-long vectors -------------------------- if (ns == nmax) { /* no recycling - use fast single-index code */ const void *vmax = vmaxget(); int *buf = (int *) R_alloc(nmax, sizeof(int)); count = 0; R_ITERATE_CHECK(NINTERRUPT, nmax, i, if (ps[i]) { if (ps[i] == NA_LOGICAL) buf[count++] = NA_INTEGER; else buf[count++] = (int)(i + 1); }); PROTECT(indx = allocVector(INTSXP, count)); memcpy(INTEGER(indx), buf, sizeof(int) * count); vmaxset(vmax); UNPROTECT(1); return indx; } count = 0; /* we only need to scan s once even if we recycle, just remember the total count as well as the count for the last incomplete chunk (if any) */ i1 = (ns < nmax) ? (nmax % ns) : 0; if (i1 > 0) { /* last recycling chunk is incomplete - we have to get the truncated count as well */ R_xlen_t rem = 0; for (i = 0; i < ns; i++) { if (i == i1) rem = count; if (ps[i]) count++; } count = count * (nmax / ns) + rem; } else { /* nested recycling, total is sufficient */ for (i = 0; i < ns; i++) if (ps[i]) count++; count *= nmax / ns; } PROTECT(indx = allocVector(INTSXP, count)); int *pindx = INTEGER(indx); count = 0; MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2, if (ps[i1]) { if (ps[i1] == NA_LOGICAL) pindx[count++] = NA_INTEGER; else pindx[count++] = (int)(i + 1); }); UNPROTECT(1); return indx; } static SEXP negativeSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP call) { SEXP indx; R_xlen_t stretch = 0; R_xlen_t i; PROTECT(indx = allocVector(LGLSXP, nx)); int *pindx = LOGICAL(indx); for (i = 0; i < nx; i++) pindx[i] = 1; const int *ps = INTEGER_RO(s); for (i = 0; i < ns; i++) { int ix = ps[i]; if (ix != 0 && ix != NA_INTEGER && -ix <= nx) pindx[-ix - 1] = 0; } s = logicalSubscript(indx, nx, nx, &stretch, call); UNPROTECT(1); return s; } static SEXP positiveSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx) { SEXP indx; R_xlen_t i, zct = 0; const int *ps = INTEGER_RO(s); for (i = 0; i < ns; i++) if (ps[i] == 0) zct++; if (zct) { indx = allocVector(INTSXP, (ns - zct)); int *pindx = INTEGER(indx); for (i = 0, zct = 0; i < ns; i++) if (ps[i] != 0) pindx[zct++] = ps[i]; return indx; } else return s; } static SEXP integerSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call) { R_xlen_t i; int ii, neg, max, canstretch; Rboolean isna = FALSE; canstretch = *stretch > 0; *stretch = 0; neg = FALSE; max = 0; const int *ps = INTEGER_RO(s); for (i = 0; i < ns; i++) { ii = ps[i]; if (ii < 0) { if (ii == NA_INTEGER) isna = TRUE; else neg = TRUE; } else if (ii > max) max = ii; } if (max > nx) { if(canstretch) *stretch = max; else { ECALL(call, _("subscript out of bounds")); } } if (neg) { if (max == 0 && !isna) return negativeSubscript(s, ns, nx, call); else { ECALL(call, _("only 0's may be mixed with negative subscripts")); } } else return positiveSubscript(s, ns, nx); return R_NilValue; } static SEXP realSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call) { R_xlen_t i; int canstretch; double ii, min, max; Rboolean isna = FALSE; canstretch = *stretch > 0; *stretch = 0; min = 0; max = 0; const double *ps = REAL_RO(s); for (i = 0; i < ns; i++) { ii = ps[i]; if (R_FINITE(ii)) { if (ii < min) min = ii; if (ii > max) max = ii; } else isna = TRUE; } if (max > nx) { #ifndef LONG_VECTOR_SUPPORT if (max > INT_MAX) { ECALL(call, _("subscript too large for 32-bit R")); } #endif if(canstretch) *stretch = (R_xlen_t) max; else { ECALL(call, _("subscript out of bounds")); } } if (min < 0) { if (max == 0 && !isna) { SEXP indx; R_xlen_t stretch = 0; double dx; R_xlen_t i, ix; PROTECT(indx = allocVector(LGLSXP, nx)); int *pindx = LOGICAL(indx); for (i = 0; i < nx; i++) pindx[i] = 1; for (i = 0; i < ns; i++) { dx = ps[i]; if (R_FINITE(dx) && dx != 0 && -dx <= nx) { ix = (R_xlen_t)(-dx - 1); pindx[ix] = 0; } } s = logicalSubscript(indx, nx, nx, &stretch, call); UNPROTECT(1); return s; } else { ECALL(call, _("only 0's may be mixed with negative subscripts")); } } else { /* Only return a REALSXP index if we need to */ SEXP indx; R_xlen_t i, cnt = 0; Rboolean int_ok = TRUE; /* NB, indices will be truncated eventually, so need to do that to take '0' into account */ for (i = 0; i < ns; i++) { double ds = ps[i]; #ifdef OLDCODE_LONG_VECTOR if (!R_FINITE(ds)) { if (ds > INT_MAX) int_ok = FALSE; cnt++; } else if ((R_xlen_t) ds != 0) cnt++; #else if (R_FINITE(ds) && ds > INT_MAX) int_ok = FALSE; if (!R_FINITE(ds) || (R_xlen_t) ds != 0) cnt++; #endif } if (int_ok) { indx = allocVector(INTSXP, cnt); int *pindx = INTEGER(indx); for (i = 0, cnt = 0; i < ns; i++) { double ds = ps[i]; int ia; if (!R_FINITE(ds)) ia = NA_INTEGER; else ia = (int) ds; if (ia != 0) pindx[cnt++] = ia; } } else { indx = allocVector(REALSXP, cnt); double *pindx = REAL(indx); for (i = 0, cnt = 0; i < ns; i++) { double ds = ps[i]; if (!R_FINITE(ds) || (R_xlen_t) ds != 0) pindx[cnt++] = ds; } } return indx; } return R_NilValue; } /* This uses a couple of horrible hacks in conjunction with * VectorAssign (in subassign.c). If subscripting is used for * assignment, it is possible to extend a vector by supplying new * names, and we want to give the extended vector those names, so they * are returned as the use.names attribute. Also, unset elements of the vector * of new names (places where a match was found) are indicated by * setting the element of the newnames vector to NULL. */ /* The original code (pre 2.0.0) used a ns x nx loop that was too * slow. So now we hash. Hashing is expensive on memory (up to 32nx * bytes) so it is only worth doing if ns * nx is large. If nx is * large, then it will be too slow unless ns is very small. */ static SEXP stringSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP names, R_xlen_t *stretch, SEXP call) { SEXP indx, indexnames; R_xlen_t i, j, nnames, extra, sub; int canstretch = *stretch > 0; /* product may overflow, so check factors as well. */ Rboolean usehashing = ( ((ns > 1000 && nx) || (nx > 1000 && ns)) || (ns * nx > 15*nx + ns) ); PROTECT(s); PROTECT(names); PROTECT(indexnames = allocVector(VECSXP, ns)); nnames = nx; extra = nnames; /* Process each of the subscripts. First we compare with the names * on the vector and then (if there is no match) with each of the * previous subscripts, since (if assigning) we may have already * added an element of that name. (If we are not assigning, any * nonmatch will have given an error.) */ if(usehashing) { /* must be internal, so names contains a character vector */ /* NB: this does not behave in the same way with respect to "" and NA names: they will match */ PROTECT(indx = match(names, s, 0)); /**** guaranteed to be fresh???*/ /* second pass to correct this */ int *pindx = INTEGER(indx); for (i = 0; i < ns; i++) if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0]) pindx[i] = 0; for (i = 0; i < ns; i++) SET_VECTOR_ELT(indexnames, i, R_NilValue); } else { PROTECT(indx = allocVector(INTSXP, ns)); int *pindx = INTEGER(indx); for (i = 0; i < ns; i++) { sub = 0; if (names != R_NilValue) { for (j = 0; j < nnames; j++) { SEXP names_j = STRING_ELT(names, j); if (NonNullStringMatch(STRING_ELT(s, i), names_j)) { sub = j + 1; SET_VECTOR_ELT(indexnames, i, R_NilValue); break; } } } pindx[i] = (int) sub; } } int *pindx = INTEGER(indx); for (i = 0; i < ns; i++) { sub = pindx[i]; if (sub == 0) { for (j = 0 ; j < i ; j++) if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) { sub = pindx[j]; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j)); break; } } if (sub == 0) { if (!canstretch) { ECALL(call, _("subscript out of bounds")); } extra += 1; sub = extra; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, i)); } pindx[i] = (int) sub; } /* We return the new names as the names attribute of the returned subscript vector. */ if (extra != nnames) setAttrib(indx, R_UseNamesSymbol, indexnames); if (canstretch) *stretch = extra; UNPROTECT(4); return indx; } /* Array Subscripts. dim is the dimension (0 to k-1) s is the subscript list, dims is the dimensions of x dng is a function (usually getAttrib) that obtains the dimnames x is the array to be subscripted. */ attribute_hidden SEXP int_arraySubscript(int dim, SEXP s, SEXP dims, SEXP x, SEXP call) { int nd, ns; R_xlen_t stretch = 0; SEXP dnames, tmp; ns = length(s); nd = INTEGER_ELT(dims, dim); switch (TYPEOF(s)) { case NILSXP: return allocVector(INTSXP, 0); case LGLSXP: return logicalSubscript(s, ns, nd, &stretch, call); case INTSXP: return integerSubscript(s, ns, nd, &stretch, call); case REALSXP: /* We don't yet allow subscripts > R_SHORT_LEN_MAX */ PROTECT(tmp = coerceVector(s, INTSXP)); tmp = integerSubscript(tmp, ns, nd, &stretch, call); UNPROTECT(1); return tmp; case STRSXP: dnames = getAttrib(x, R_DimNamesSymbol); if (dnames == R_NilValue) { ECALL(call, _("no 'dimnames' attribute for array")); } dnames = VECTOR_ELT(dnames, dim); return stringSubscript(s, ns, nd, dnames, &stretch, call); case SYMSXP: if (s == R_MissingArg) return nullSubscript(nd); default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return R_NilValue; } /* This is used by packages arules, cba, proxy and seriation. */ typedef SEXP AttrGetter(SEXP x, SEXP data); typedef SEXP (*StringEltGetter)(SEXP x, int i); SEXP arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng, StringEltGetter strg, SEXP x) { return int_arraySubscript(dim, s, dims, x, R_NilValue); } /* Subscript creation. The first thing we do is check to see */ /* if there are any user supplied NULL's, these result in */ /* returning a vector of length 0. */ /* if stretch is zero on entry then the vector x cannot be "stretched", otherwise, stretch returns the new required length for x */ SEXP attribute_hidden makeSubscript(SEXP x, SEXP s, R_xlen_t *stretch, SEXP call) { if (! (isVector(x) || isList(x) || isLanguage(x))) { ECALL(call, _("subscripting on non-vector")); } R_xlen_t nx = xlength(x); /* special case for simple indices -- does not duplicate */ if (IS_SCALAR(s, INTSXP)) { int i = SCALAR_IVAL(s); if (0 < i && i <= nx) { *stretch = 0; return s; } } else if (IS_SCALAR(s, REALSXP)) { double di = SCALAR_DVAL(s); if (1 <= di && di <= nx) { *stretch = 0; /* We could only return a REALSXP if the value is too large for an INTSXP, but, as the calling code can handle REALSXP indices, returning the REALSXP avoids an allocation. */ return s; } } R_xlen_t ns = xlength(s); SEXP ans = R_NilValue; switch (TYPEOF(s)) { case NILSXP: *stretch = 0; ans = allocVector(INTSXP, 0); break; case LGLSXP: ans = logicalSubscript(s, ns, nx, stretch, call); break; case INTSXP: ans = integerSubscript(s, ns, nx, stretch, call); break; case REALSXP: ans = realSubscript(s, ns, nx, stretch, call); break; case STRSXP: { SEXP names = getAttrib(x, R_NamesSymbol); /* *stretch = 0; */ ans = stringSubscript(s, ns, nx, names, stretch, call); break; } case SYMSXP: *stretch = 0; if (s == R_MissingArg) { ans = nullSubscript(nx); break; } default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return ans; }